
package org.apache.solr.search;

import java.util.EnumSet;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.BasicAutomata;
import org.apache.lucene.util.automaton.BasicOperations;
import org.apache.lucene.util.automaton.SpecialOperations;
import org.apache.solr.analysis.ReversedWildcardFilterFactory;
import org.apache.solr.analysis.TokenizerChain;
import org.apache.solr.common.SolrException;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.TextField;

/**
 * A variation on the Lucene QueryParser which knows about the field 
 * types and query time analyzers configured in Solr's schema.xml.
 *
 * <p>
 * This class also deviates from the Lucene QueryParser by using 
 * ConstantScore versions of RangeQuery and PrefixQuery to prevent 
 * TooManyClauses exceptions.
 * </p> 
 *
 * <p>
 * If the magic field name "<code>_val_</code>" is used in a term or 
 * phrase query, the value is parsed as a function.
 * </p>
 */
public class SolrQueryParser extends QueryParser {

    protected final IndexSchema schema;
    protected final QParser parser;
    protected final String defaultField;

    /**
     * Identifies the list of all known "magic fields" that trigger special
     * parsing behavior
     */
    public static enum MagicFieldName {

        VAL("_val_", "func"), QUERY("_query_", null);
        public final String field;
        public final String subParser;

        MagicFieldName(final String field, final String subParser) {
            this.field = field;
            this.subParser = subParser;
        }

        @Override
        public String toString() {
            return field;
        }
        private final static Map<String, MagicFieldName> lookup = new HashMap<>();

        static {
            for (MagicFieldName s : EnumSet.allOf(MagicFieldName.class)) {
                lookup.put(s.toString(), s);
            }
        }

        public static MagicFieldName get(final String field) {
            return lookup.get(field);
        }
    }
    // implementation detail - caching ReversedWildcardFilterFactory based on type
    private Map<FieldType, ReversedWildcardFilterFactory> leadingWildcards;

    public SolrQueryParser(QParser parser, String defaultField) {
        this(parser, defaultField, parser.getReq().getSchema().getQueryAnalyzer());
    }

    public SolrQueryParser(QParser parser, String defaultField, Analyzer analyzer) {
        super(parser.getReq().getCore().getSolrConfig().luceneMatchVersion, defaultField, analyzer);
        this.schema = parser.getReq().getSchema();
        this.parser = parser;
        this.defaultField = defaultField;
        setEnablePositionIncrements(true);
        setLowercaseExpandedTerms(false);
        setAllowLeadingWildcard(true);
    }

    protected ReversedWildcardFilterFactory getReversedWildcardFilterFactory(FieldType fieldType) {
        if (leadingWildcards == null) {
            leadingWildcards = new HashMap<>();
        }
        ReversedWildcardFilterFactory fac = leadingWildcards.get(fieldType);
        if (fac == null && leadingWildcards.containsKey(fac)) {
            return fac;
        }

        Analyzer a = fieldType.getAnalyzer();
        if (a instanceof TokenizerChain) {
            // examine the indexing analysis chain if it supports leading wildcards
            TokenizerChain tc = (TokenizerChain) a;
            TokenFilterFactory[] factories = tc.getTokenFilterFactories();
            for (TokenFilterFactory factory : factories) {
                if (factory instanceof ReversedWildcardFilterFactory) {
                    fac = (ReversedWildcardFilterFactory) factory;
                    break;
                }
            }
        }

        leadingWildcards.put(fieldType, fac);
        return fac;
    }

    private void checkNullField(String field) throws SolrException {
        if (field == null && defaultField == null) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "no field name specified in query and no default specified via 'df' param");
        }
    }

    protected String analyzeIfMultitermTermText(String field, String part, FieldType fieldType) {
        if (part == null) {
            return part;
        }

        SchemaField sf = schema.getFieldOrNull((field));
        if (sf == null || !(fieldType instanceof TextField)) {
            return part;
        }
        String out = TextField.analyzeMultiTerm(field, part, ((TextField) fieldType).getMultiTermAnalyzer()).utf8ToString();
        // System.out.println("INPUT="+part + " OUTPUT="+out);
        return out;
    }

    @Override
    protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException {
        checkNullField(field);
        // intercept magic field name of "_" to use as a hook for our
        // own functions.
        if (field.charAt(0) == '_' && parser != null) {
            MagicFieldName magic = MagicFieldName.get(field);
            if (null != magic) {
                QParser nested = parser.subQuery(queryText, magic.subParser);
                return nested.getQuery();
            }
        }
        SchemaField sf = schema.getFieldOrNull(field);
        if (sf != null) {
            FieldType ft = sf.getType();
            // delegate to type for everything except tokenized fields
            if (ft.isTokenized()) {
                return super.getFieldQuery(field, queryText, quoted || (ft instanceof TextField && ((TextField) ft).getAutoGeneratePhraseQueries()));
            }
            else {
                return sf.getType().getFieldQuery(parser, sf, queryText);
            }
        }

        // default to a normal field query
        return super.getFieldQuery(field, queryText, quoted);
    }

    @Override
    protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws ParseException {
        checkNullField(field);
        SchemaField sf = schema.getField(field);
        return sf.getType().getRangeQuery(parser, sf, part1, part2, startInclusive, endInclusive);
    }

    @Override
    protected Query getPrefixQuery(String field, String termStr) throws ParseException {
        checkNullField(field);

        termStr = analyzeIfMultitermTermText(field, termStr, schema.getFieldType(field));

        // Solr has always used constant scoring for prefix queries.  This should return constant scoring by default.
        return newPrefixQuery(new Term(field, termStr));
    }

    @Override
    protected Query getWildcardQuery(String field, String termStr) throws ParseException {
        checkNullField(field);
        // *:* -> MatchAllDocsQuery
        if ("*".equals(field) && "*".equals(termStr)) {
            return newMatchAllDocsQuery();
        }
        FieldType fieldType = schema.getFieldType(field);
        termStr = analyzeIfMultitermTermText(field, termStr, fieldType);
        // can we use reversed wildcards in this field?
        ReversedWildcardFilterFactory factory = getReversedWildcardFilterFactory(fieldType);
        if (factory != null) {
            Term term = new Term(field, termStr);
            // fsa representing the query
            Automaton automaton = WildcardQuery.toAutomaton(term);
            // TODO: we should likely use the automaton to calculate shouldReverse, too.
            if (factory.shouldReverse(termStr)) {
                automaton = BasicOperations.concatenate(automaton, BasicAutomata.makeChar(factory.getMarkerChar()));
                SpecialOperations.reverse(automaton);
            }
            else {
                // reverse wildcardfilter is active: remove false positives
                // fsa representing false positives (markerChar*)
                Automaton falsePositives = BasicOperations.concatenate(
                        BasicAutomata.makeChar(factory.getMarkerChar()),
                        BasicAutomata.makeAnyString());
                // subtract these away
                automaton = BasicOperations.minus(automaton, falsePositives);
            }
            return new AutomatonQuery(term, automaton) {
                // override toString so its completely transparent
                @Override
                public String toString(String field) {
                    StringBuilder buffer = new StringBuilder();
                    if (!getField().equals(field)) {
                        buffer.append(getField());
                        buffer.append(":");
                    }
                    buffer.append(term.text());
                    buffer.append(ToStringUtils.boost(getBoost()));
                    return buffer.toString();
                }
            };
        }

        // Solr has always used constant scoring for wildcard queries.  This should return constant scoring by default.
        return newWildcardQuery(new Term(field, termStr));
    }

    @Override
    protected Query getRegexpQuery(String field, String termStr) throws ParseException {
        termStr = analyzeIfMultitermTermText(field, termStr, schema.getFieldType(field));
        return newRegexpQuery(new Term(field, termStr));
    }
}
